capture log close
log using daly-data, replace text

// program:	daly-data.do
// task:	data cleaning for DALY health state survey
// input:	daly-weights.csv daly-raw.xls
// output:	daly-data
// project: DALY and value judgments
// author:  sam harper \ 21apr2015

//  #0
//  program setup

version 12
set linesize 80
clear all
macro drop _all


// #1
// import data

* first file with GBD disability weights
import delimited "daly-weights.csv", encoding(ISO-8859-1) clear
saveold daly-weights, replace version(12)

import excel "daly-raw.xls", ///
	sheet("DALYsurveyV2-13 Feb 2015.txt") cellrange(A1:BE1593) firstrow clear

* labels for survey meta data
label var wid "worker ID"
label var cfwid "crowdflower ID"
label var ipaddress "user IP address"
label var wscore "weighted score"
drop time
label var begin "survey begin time"
label var end "survey end time"
gen double stime = end - begin
format stime %tcmm:ss
label var stime "survey time to completion"

* rescale and categorize minutes to complete the survey
gen minutes = stime / (1000*60)
label var minutes "minutes to complete survey"
recode minutes (0/9.99 = 0 "<10 mins") (10/19.99 = 1 "10-19 mins") ///
	(20/max = 2 "20+ mins"), gen(timecat)
tab timecat
label var timecat "survey time to completion"

* drop a few variables
drop status // all "complete"
drop language // all responses in English
drop browser // irrelevant

replace survey=survey-1
label define survey 0 "v1" 1 "v2", modify
label values survey survey
label var survey "survey version"


	
// #2
// reshape dataset to long, create demographic characteristics

rename q41 age // respondent age
label var age "respondent age"

reshape long q, i(wid age gender educ race survey) j(question)
label var question "survey question"

rename q rating
label var rating "health rating"

label define question ///
	0 "Rating=53" ///
	1 "Severe chest injury" ///
	2 "Cannot move without help" ///
	3 "Tired, itching, cramps..." ///
	4 "Confined to bed or wheelchair..." ///
	5 "Paralyzed from the neck down..." ///
	6 "Frequent headaches, memory problems..." ///
	7 "Scars caused by burns..." ///
	8 "Confined to bed or a wheelchair..." ///
	9 "Hyperactive, hears and believes..." ///
	10 "Pouch attached to an opening..." ///
	11 "One breast removed..." ///
	12 "Needs help walking..." ///
	13 "Paralyzed from the neck down..." ///
	14 "Low intelligence..." ///
	15 "Not breathing and has no pulse..." ///
	16 "Lost a lot of weight..." ///
	17 "Severe tremors and moves slow..." ///
	18 "Abnormal opening between her vagina..." ///
	19 "Painful burn over a large part..." ///
	20 "Some difficulty in moving around..." ///
	21 "Rating=53" ///
	22 "Slurred speech/difficulty swallowing..." ///
	23 "Persistent cough and fever..." ///
	24 "Large mass in front of the neck..." ///
	25 "Uses heroin daily..." ///
	26 "Rating=44" ///
	27 "Hears/sees things not real..." ///
	28 "Lost more than 20..." ///
	29 "Complete memory loss..." ///
	30 "Diarrhea 3+ times a day..." ///
	31 "Blistering skin rash..." ///
	32 "Severe, constant pain and deformity..." ///
	33 "No aches/pains, has no difficulty..." ///
	34 "Cannot think clearly/frequent headaches." ///
	35 "Paralyzed from the waist down..." ///
	36 "Pain/deformity in most joints..." ///
	37 "Severe vision loss..." ///
	38 "Difficulty in obtaining/maintaning erect" ///
	39 "Abnormal opening between bladder..." ///
	40 "Severe weight loss, weakness, fatigue...", modify
label values question question

* age
label var age "Age (years)"
recode age (14/24=0 "14-24y") (25/34=1 "25-34y") (35/44=2 "35-44y") ///
	(45/120=3 "45+"), gen(age4)
label var age4 "age (4 categories)"

* gender
encode gender, gen(genderexp)
drop gender

rename genderexp gender

recode gender (2=0 "no") (1 3 = .) (4=1 "yes"), gen(woman)
label var woman "respondent is woman?"

* race
encode race, gen(raceexp)
label var raceexp "expanded race"
drop race

recode raceexp (14=0 "White") (7=1 "Indian") (2=2 "Asian") ///
	(1 3/6 8/13=3 "Other"), gen(race4)
tab raceexp race4
label var race4 "race (4 categories)"

* education
encode educ, gen(educ2)
recode educ2 (3/4=0 "<=HS") (5=1 "Univ degree") (2=2 "Graduate+") ///
	(1=.), gen(educ3)
tab educ2 educ3
label var educ3 "highest education completed"


	
// #3
// merge with GBD 2010 disability weights

merge m:1 question using daly-weights
drop _merge

* drop dummy questions
drop if inlist(question,0,15,21,26,33)

label var treated "which survey question treated?"
label define treated 0 "v2 treated" 1 "v1 treated"
label values treated treated

* generate binary indicator for with versus without information
gen added=0
replace added=1 if survey==0 & treated==1
replace added=1 if survey==1 & treated==0
label define noyes 0 no 1 yes, modify
label values added noyes
label var added "added information?"

* category of additional information
label define cat 1 "psychological" 2 "familial" 3 "social", modify
label values cat cat
label var cat "category of added information"

* GBD health state descriptions
encode gbddesc, gen(gbdd)
drop gbddesc
label var gbdd "GBD health states"

* GBD disability weights
label var dw "GBD 2010 disability weight"
note dw: "Extracted from IHME website on 16apr2015 by sh http://ghdx.healthdata.org/record/global-burden-disease-study-2010-gbd-2010-disability-weights"

* categorize disability weights
gen dwc=0 if dw<.25 & dw!=.
replace dwc=1 if dw>=.25 & dw<.5
replace dwc=2 if dw>.5 & dw!=.
label var dwc "disability weight category"
label define dwc 0 "<0.25" 1 "0.25-0.49" 2 "0.50+", modify
label values dwc dwc

label data "Health state description data \ daly-data.do $S_DATE sh"

datasignature set, reset
saveold daly-data, replace version(12)

log close
exit



